My super title

Posted on Tue 21 April 2020 in yeah

These interactive plots are generated by Bokeh

import math print 'import done'
Covid19 Cases in San Diego with Bokeh

Plotting Covid-19 Cases according to Zipcodes using Bokeh

Import the required modules

import tabula
import requests
import datetime
import geopandas as gpd
import pandas as pd
import json
from dateutil import parser
from bokeh.io import reset_output, output_notebook, show

from bokeh.plotting import figure
from bokeh.models import Div, Column, Row


#make bokeh output to notebook
reset_output()
output_notebook()
Loading BokehJS ...

Download the Covid-19 case data from San Diego County

url = 'https://www.sandiegocounty.gov/content/dam/sdc/hhsa/programs/phs/Epidemiology/COVID-19%20Summary%20of%20Cases%20by%20Zip%20Code.pdf'
pdf= requests.get(url)
with open(f'covid19_in_sd_{datetime.datetime.now().date()}.pdf','wb') as f:
    f.write(pdf.content)

Check if the pdf is downloaded

!ls *.pdf
covid19_in_sd_2020-04-18.pdf  covid19_in_sd_2020-04-20.pdf

covid19_in_sd_2020-04-19.pdf  covid19_in_sd_2020-04-21.pdf

Read the pdf and clean up the data

def tabula_convert_pdf_to_df(pdf):

    raw_data = tabula.read_pdf(pdf,stream=True,pages=1)[0]
    title = tabula.read_pdf(pdf,pages=1)[0].columns[0].split('\r')[-1]

    dates = []
    for _ in title.split():
        try:
            dates.append(parser.parse(_, fuzzy=True))
        except Exception as e:
            pass

    date = dates[0] # no use now

    updated_time = datetime.datetime.combine(dates[1],datetime.time(dates[2].hour)) # no use now

    df= pd.DataFrame({
                      'ZipCode' : pd.concat([raw_data['Zip Code'].astype(str),
                                             raw_data['Zip Code.1'].astype(str)]),

                       'CaseCount': pd.concat([raw_data['Unnamed: 0'],raw_data['Count.1']
                                                     ])
                     })[:-2]

    df['CaseCount'] = df['CaseCount'].astype('int16')
    total_count = sum(df['CaseCount'])
    return title,total_count, df


title, total_count, count_data = tabula_convert_pdf_to_df('covid19_in_sd_2020-04-21.pdf')

Get the geojson file of communities

Later in this notebook, I want to plot the data on a map base on zip code geometry. I used the export geojson from https://data.sandiegocounty.gov/Maps-and-Geographical-Resources/Zip-Codes/vsuf-uefy to get the geojson file. In addition to the zip code geometry, it also has the name of the community it belongs to.

county_gpd = gpd.read_file(f'Sandiego_Zip_codes.geojson')
county_gpd.head()
community shape_star shape_stle zip geometry
0 Alpine 4149939944.16 326045.262676 91901 MULTIPOLYGON (((-116.74539 32.96063, -116.7408...
1 Bonita 273909416.836 113257.374615 91902 MULTIPOLYGON (((-116.97172 32.70838, -116.9712...
2 Boulevard 2735681408.51 241725.552214 91905 MULTIPOLYGON (((-116.23165 32.75083, -116.2280...
3 Campo 3066759065.62 287410.325075 91906 MULTIPOLYGON (((-116.35677 32.70460, -116.3572...
4 Chula Vista 403437442.009 112587.791814 91910 MULTIPOLYGON (((-117.06354 32.65011, -117.0634...

I then merge geojson zip code geometry data with case count data. I do a 'how =right' merge with the zip code as the common key. All the right rows (rows in case count per zip code) will be preserved.

merged =county_gpd.merge(count_data, right_on = 'ZipCode', left_on = 'zip',
                         how = 'right').drop(columns=['zip']).rename(columns={'community':'CommunityName'})
merged.head()
CommunityName shape_star shape_stle geometry ZipCode CaseCount
0 Alpine 4149939944.16 326045.262676 MULTIPOLYGON (((-116.74539 32.96063, -116.7408... 91901 2
1 Bonita 273909416.836 113257.374615 MULTIPOLYGON (((-116.97172 32.70838, -116.9712... 91902 18
2 Boulevard 2735681408.51 241725.552214 MULTIPOLYGON (((-116.23165 32.75083, -116.2280... 91905 2
3 Chula Vista 403437442.009 112587.791814 MULTIPOLYGON (((-117.06354 32.65011, -117.0634... 91910 70
4 Chula Vista 329043951.316 93108.6951441 MULTIPOLYGON (((-117.04641 32.62846, -117.0463... 91911 98

Plot the case counts as function of zip code and community

nogeo_data = merged[['ZipCode', 'CaseCount', 'CommunityName']].fillna('Unknown')

Split the communities into three parts to plot them seperately.

import numpy as np
split0 = nogeo_data.loc[nogeo_data['CommunityName'] == 'San Diego']

## Split the all other remaining
split1, split2 = np.array_split(nogeo_data.loc[nogeo_data['CommunityName'] != 'San Diego'], 2)

# make groupby to create bokeh nested x range plots
sandiego = split0.groupby(by=['CommunityName', 'ZipCode'])
part1 = split1.groupby(by=['CommunityName', 'ZipCode'])
part2 = split2.groupby(by=['CommunityName', 'ZipCode'])
def create_plot(df):
    p = figure(plot_height=250, plot_width=800, x_range=df, toolbar_location=None,
               tooltips=[("CaseCount", "@CaseCount_mean"),
                         ("Community Name, ZipCode", "@CommunityName_ZipCode")]
              )

    p.vbar(x='CommunityName_ZipCode', top='CaseCount_mean', width=1, source=df,
           line_color="white" )

    p.y_range.start = 0
    p.x_range.range_padding = 0.05
    p.xgrid.grid_line_color = None
    p.yaxis.axis_label = "Case Count"
    p.xaxis.major_label_orientation = 22/28
    p.xaxis.group_label_orientation = 22/28
    p.xaxis.major_label_text_font_size = "8pt"
    p.xaxis.group_text_font_size = "10pt"
    p.title.text_font_size = "16pt"
    p.yaxis.axis_label_text_font_size = "16pt"
    p.xaxis.axis_label_text_font_size = "16pt"
    p.yaxis.axis_label_text_font_size = "16pt"
    p.outline_line_color = None
    p.x_range.group_padding = 1.0
    return p
bar_chart = Column( create_plot(sandiego), create_plot(part1), create_plot(part2))
# groupby are not sorted by bokeh. seems like a bug

Chloropleth Map of Covid 19 cases according to Zip Code in San Diego

# Convert merged data to json. Because there are some case counts from
# "unknown" they dont have any geo information.

merged_json = json.loads(merged.dropna().to_json())

# Convert to json to str like object because bokeh needs it in this form.
json_data = json.dumps(merged_json)
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, HoverTool
from bokeh.palettes import brewer

geosource = GeoJSONDataSource(geojson = json.dumps(merged_json))


#Define a sequential multi-hue color palette.
palette = brewer['YlOrRd'][8]

#Reverse color order
palette = palette[::-1]
#max(merged['CaseCount'])
#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0,
                                 high = 80 )

tick_labels = {0:"0",
               10:"10",
               20:"20",
               30:"30",
               40:"40",
               50:"50",
               60:"60",
               70:"70",
               80:">90"
              }
#Create color bar.
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,
                     width = 500, height = 20,border_line_color=None,
                     location = (0,0),
                     orientation = 'horizontal',
                     major_label_overrides = tick_labels)

#Add hover tool
hover = HoverTool(tooltips = [ ('CaseCount','@CaseCount'),
                              ('Zip Code', '@ZipCode'),
                              ('Community Name', '@CommunityName')]
                 )

#Create figure object.
map_figure = figure(
    x_axis_location=None, y_axis_location=None,
           plot_height = 1000 , plot_width = 950,
           toolbar_location = None,
          tools = [hover])

map_figure.xgrid.grid_line_color = None
map_figure.ygrid.grid_line_color = None

#Add patch renderer to figure.
map_figure.patches('xs','ys', source = geosource,
          fill_color = {'field' :'CaseCount', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

map_figure.title.text_font_size = '16pt'
#Specify figure layout.
map_figure.add_layout(color_bar, 'above')


map_collage = Column( Div(text = title,  style={'font-size': '200%', 'color': 'blue'}),
                Div(text = f'Total number of cases: {total_count}',style={'font-size': '200%', 'color': 'red'}),
                    map_figure)

#show(map_figure)
collage = Column(map_collage, bar_chart)
show(collage)
# Generate standlone html documents with the collage of both plots

from bokeh.resources import CDN
from bokeh.embed import file_html

try:
    html1 = file_html(collage, CDN, 'Covid19 Cases in San Diego with Bokeh')
except Exception as e:
    print(e)


with open('Covid19_apr20.html','w') as f:
    f.write(html1)